RNA-Puzzle 20 - Twister sister ribozyme


In [23]:
import nglview
from rna_tools.Seq import RNASequence
from rna_tools.BlastPDB import BlastPDB
%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload

In [8]:
seq = RNASequence("ACCCGCAAGGCCGACGGCGCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU")

In [9]:
seq


Out[9]:
ACCCGCAAGGCCGACGGCGCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU

Secondary structure prediction


In [10]:
print seq.predict_ss()


>rna_seq
ACCCGCAAGGCCGACGGCGCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU
(((((..(((((.(((.((..((.(((((.......)))))...))..)).))).))).))..))))) (-32.50)

In [11]:
print seq.predict_ss(method='RNAsubopt')


>rna_seq [100]
ACCCGCAAGGCCGACGGCGCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU -32.50   1.00
(((((..(((((.(((.((..((.(((((.......)))))...))..)).))).))).))..))))) -32.50
.((((..(((((.(((.((..((.(((((.......)))))...))..)).))).))).))..)))). -31.90


In [12]:
print seq.predict_ss(method='ipknot', verbose=True)


ACCCGCAAGGCCGACGGCGCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU
..((....))(((..((((((...(((((.......)))))(((((....)))))))))))..)))..


In [13]:
print seq.predict_ss(method='contextfold')


(((((..........((((((...(((((.......)))))(((((....)))))))))))..)))))


Search Rfam


In [26]:
import rna_tools.RfamSearch as RfamSearch
rs = RfamSearch.RfamSearch()
print rs.cmscan(seq)


# cmscan :: search sequence(s) against a CM database
# INFERNAL 1.1.2 (July 2016)
# Copyright (C) 2016 Howard Hughes Medical Institute.
# Freely distributed under a BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query sequence file:                   /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmpzCA8Uh.fa
# target CM database:                    /home/magnus/work/db/rfam/Rfam.cm
# sequence reporting threshold:          E-value <= 1
# number of worker threads:              4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       target  [L=68]
Hit scores:
 rank     E-value  score  bias  modelname       start    end   mdl trunc   gc  description
 ----   --------- ------ -----  -------------- ------ ------   --- ----- ----  -----------
  (1) !   1.2e-14   66.6   0.1  Twister-sister      1     68 +  cm    no 0.74  Twister_sister_ribozyme


Hit alignments:
>> Twister-sister  Twister_sister_ribozyme
 rank     E-value  score  bias mdl mdl from   mdl to       seq from      seq to       acc trunc   gc
 ----   --------- ------ ----- --- -------- --------    ----------- -----------      ---- ----- ----
  (1) !   1.2e-14   66.6   0.1  cm        4       79 ..           1          68 + [] 0.96    no 0.74

                                                                                                 NC
                    ((((-----(((,,<<<_______>>>,,<<<<_________>>>><<<<<<_____>>>>>>)))------)))) CS
  Twister-sister  4 ACCCGCaAGGCCGACGgauuacCccCGCCGCcGGUGCAAGCCCgGCCgCCCcagacagGGGcGGGCGCUCAUGGGU 79
                    ACCCGCAAGGCCGACGG+     +CCGCCGC:GGUGCAAG CC:GCC:C:C:    +:G:G:GGGCGCUCAUGGGU
          target  1 ACCCGCAAGGCCGACGGC-----GCCGCCGCUGGUGCAAGUCCAGCCACGCUU---CGGCGUGGGCGCUCAUGGGU 68
                    *****************4.....4***************************95...49****************** PP



Internal HMM-only pipeline statistics summary: (run for model(s) with zero basepairs)
--------------------------------------------------------------------------------------
Query sequence(s):                                               1  (136 residues searched)
Target model(s):                                               347  (40911 consensus positions)
Windows   passing  local HMM SSV           filter:               3  (0.004323); expected (0.02)
Windows   passing  local HMM MSV      bias filter:               3  (0.004323); expected (0.02)
Windows   passing  local HMM Viterbi       filter:               0  (0); expected (0.001)
Windows   passing  local HMM Forward       filter:               0  (0); expected (1e-05)
Total HMM hits reported:                                         0  (0)

Internal CM pipeline statistics summary:
----------------------------------------
Query sequence(s):                                               1  (136 residues searched)
Query sequences re-searched for truncated hits:                  1  (395.9 residues re-searched, avg per model)
Target model(s):                                              2669  (345034 consensus positions)
Windows   passing  local HMM SSV           filter:            1219  (0.05374); expected (0.35)
Windows   passing  local HMM Viterbi       filter:                  (off)
Windows   passing  local HMM Viterbi  bias filter:                  (off)
Windows   passing  local HMM Forward       filter:             557  (0.02511); expected (0.02)
Windows   passing  local HMM Forward  bias filter:             411  (0.01852); expected (0.02)
Windows   passing glocal HMM Forward       filter:             108  (0.004885); expected (0.02)
Windows   passing glocal HMM Forward  bias filter:              93  (0.004192); expected (0.02)
Envelopes passing glocal HMM envelope defn filter:              90  (0.003643); expected (0.02)
Envelopes passing  local CM  CYK           filter:              20  (0.00066); expected (0.0001)
Total CM hits reported:                                          1  (4.79e-05); includes 0 truncated hit(s)

Total CM and HMM hits reported:                                  1

# CPU time: 2.68u 0.18s 00:00:02.86 Elapsed: 00:00:01.07
//
[ok]


In [28]:
import rna_pdb_tools.utils.rna_alignment.rna_alignment as ra
a = ra.RNAalignment(fetch="RF02681")# '/Users/magnus/work/sister_twister/RAGATH-3-twister-sister.sto')
a


Out[28]:
SingleLetterAlphabet() alignment with 4 rows and 85 columns
GCAACCCGCAAGGCCGACGCACAAC---GCGCCGCCGGUGCAAG...ACA ADJS01013948.1/250-330
GAAACCCGCUAGGCCGACAGCCUCACCGCUGCCGCUGGUGCAAG...CAG BABG01005008.1/780-696
ACGACCCGCAAGGCCGACGCAUAAC---GCGCCGCCGGUGCAAG...ACA ADJS01013948.1/577-657
AUGACCCGCAAGGCCGACGGCAUCCCG-CCGCCGCUGGUGCAAG...ACA FP929046.1/2708602-2708521

In [30]:
#print a.io.format("fasta")
ra.RChie().plot_cov(a.io.format("fasta"), a.ss_cons_std, verbose=False)


Rchie: plot saved to rchie.png
Out[30]:

In [31]:
a.find_seq("UGCAAGUC")


Not found

In [28]:
import rna_pdb_tools.RfamSearch as rf
seq = Seq.Seq("GCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU")
rs = rf.RfamSearch()
print rs.cmscan(seq)


GCCGCCGCUGGUGCAAGUCCAGCCACGCUUCGGCGUGGGCGCUCAUGGGU
# cmscan :: search sequence(s) against a CM database
# INFERNAL 1.1.2 (July 2016)
# Copyright (C) 2016 Howard Hughes Medical Institute.
# Freely distributed under a BSD open source license.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# query sequence file:                   /var/folders/yc/ssr9692s5fzf7k165grnhpk80000gp/T/tmpJZab9J.fa
# target CM database:                    /home/magnus/work/db/rfamdb/Rfam.cm
# sequence reporting threshold:          E-value <= 1
# number of worker threads:              4
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -

Query:       target  [L=50]
Hit scores:
 rank     E-value  score  bias  modelname  start    end   mdl trunc   gc  description
 ----   --------- ------ -----  --------- ------ ------   --- ----- ----  -----------

   [No hits detected that satisfy reporting thresholds]


Hit alignments:

   [No hits detected that satisfy reporting thresholds]


Internal HMM-only pipeline statistics summary: (run for model(s) with zero basepairs)
--------------------------------------------------------------------------------------
Query sequence(s):                                               1  (100 residues searched)
Target model(s):                                               347  (40911 consensus positions)
Windows   passing  local HMM SSV           filter:               2  (0.002882); expected (0.02)
Windows   passing  local HMM MSV      bias filter:               2  (0.002882); expected (0.02)
Windows   passing  local HMM Viterbi       filter:               0  (0); expected (0.001)
Windows   passing  local HMM Forward       filter:               0  (0); expected (1e-05)
Total HMM hits reported:                                         0  (0)

Internal CM pipeline statistics summary:
----------------------------------------
Query sequence(s):                                               1  (100 residues searched)
Query sequences re-searched for truncated hits:                  1  (296.8 residues re-searched, avg per model)
Target model(s):                                              2126  (267652 consensus positions)
Windows   passing  local HMM SSV           filter:             612  (0.03466); expected (0.35)
Windows   passing  local HMM Viterbi       filter:                  (off)
Windows   passing  local HMM Viterbi  bias filter:                  (off)
Windows   passing  local HMM Forward       filter:             224  (0.01279); expected (0.02)
Windows   passing  local HMM Forward  bias filter:             193  (0.01105); expected (0.02)
Windows   passing glocal HMM Forward       filter:              41  (0.002341); expected (0.02)
Windows   passing glocal HMM Forward  bias filter:              38  (0.002163); expected (0.02)
Envelopes passing glocal HMM envelope defn filter:              37  (0.001969); expected (0.02)
Envelopes passing  local CM  CYK           filter:               8  (0.0003414); expected (0.0001)
Total CM hits reported:                                          0  (0); includes 0 truncated hit(s)

Total CM and HMM hits reported:                                  0

# CPU time: 1.10u 0.21s 00:00:01.31 Elapsed: 00:00:00.74
//
[ok]

PDB Blast search


In [29]:
p = BlastPDB(seq.seq)
p.search()
print p.result


<HTML>
<TITLE>BLAST Search Results</TITLE>
<BODY BGCOLOR="#FFFFFF" LINK="#0000FF" VLINK="#660099" ALINK="#660099">
<PRE>
<b>BLASTN 2.2.18 [Mar-02-2008]</b>


<b><a href="http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed&cmd=Retrieve&list_uids
=9254694&dopt=Citation">Reference</a>:</b>
Altschul, Stephen F., Thomas L. Madden, Alejandro A. Sch&auml;ffer, 
Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), 
"Gapped BLAST and PSI-BLAST: a new generation of protein database search
programs",  Nucleic Acids Res. 25:3389-3402.

<b>Query=</b> UNKNOWN_SEQUENCE
         (50 letters)

<b>Database:</b> pdb_nucleotide 
           16,795 sequences; 3,045,280 total letters

Searching..................................................done

<PRE>


                                                                 Score    E
Sequences producing significant alignments:                      (bits) Value

2AU4:1:A|pdbid|entity|chain(s)|sequence                               <a href = #3797> 24</a>   5.1  
</PRE>
<PRE>
><a name = 3797></a>2AU4:1:A|pdbid|entity|chain(s)|sequence
          Length = 41

 Score = 24.3 bits (12), Expect = 5.1
 Identities = 12/12 (100%)
 Strand = Plus / Plus

                      
Query: 26 cgcttcggcgtg 37
          ||||||||||||
Sbjct: 18 cgcttcggcgtg 29
</PRE>


<PRE>
  Database: pdb_nucleotide
    Posted date:  Jun 4, 2017  7:12 AM
  Number of letters in database: 3,045,280
  Number of sequences in database:  16,795
  
Lambda     K      H
    1.37    0.711     1.31 

Gapped
Lambda     K      H
    1.37    0.711     1.31 


Matrix: blastn matrix:1 -3
Gap Penalties: Existence: 5, Extension: 2
Number of Sequences: 16795
Number of Hits to DB: 1662
Number of extensions: 13
Number of successful extensions: 12
Number of sequences better than 10.0: 1
Number of HSP's gapped: 12
Number of HSP's successfully gapped: 1
Length of query: 50
Length of database: 3,045,280
Length adjustment: 13
Effective length of query: 37
Effective length of database: 2,826,945
Effective search space: 104596965
Effective search space used: 104596965
X1: 10 (19.8 bits)
X2: 15 (29.7 bits)
X3: 50 (99.1 bits)
S1: 10 (20.3 bits)
S2: 12 (24.3 bits)
</PRE>
</BODY>
</HTML>

3D structure analysis


In [12]:
from rna_pdb_tools.pdb_parser_lib import RNAStructure

fn = "rna_pdb_tools/data/260c8ff6-f24e-4eff-9760-1831407fc770_ALL_thrs5.30A_clust01-000001_AA.pdb"

s = RNAStructure(fn)
print s.get_report()
print s.get_info_chains()
print s.get_head()
#print s.view() # image paste here :-)


The RNARNAStructure report: rna_pdb_tools/data/260c8ff6-f24e-4eff-9760-1831407fc770_ALL_thrs5.30A_clust01-000001_AA.pdb 
A:1-43 B:1-10
ATOM      1  P     G A   1     -12.509  18.639  13.726  1.00  0.00
ATOM      2  OP1   G A   1     -13.934  18.507  14.168  1.00  0.00
ATOM      3  OP2   G A   1     -11.541  17.557  14.097  1.00  0.00
ATOM      4  O5'   G A   1     -12.604  18.683  12.146  1.00  0.00
ATOM      5  C5'   G A   1     -13.512  19.569  11.525  1.00  0.00

In [13]:
%%bash
cd rna_pdb_tools
./rna-pdb-tools.py --no_hr --get_seq data/260c8ff6-f24e-4eff-9760-1831407fc770_ALL_thrs5.30A_clust01-000001_AA.pdb


bash: line 2: ./rna-pdb-tools.py: No such file or directory

RNA 3D structure prediction


In [14]:
# model using SimRNA
#res = SimRNA(ss,seq.get_ss())

In [15]:
# fake import, should be 
res = "rna_pdb_tools/data/260c8ff6-f24e-4eff-9760-1831407fc770_ALL_thrs5.30A_clust01-000001_AA.pdb"
# view
view = nglview.show_structure_file(res)
view.add_representation(repr_type='cartoon')
view

rna_pdb_tools --get_seq


In [16]:
%%bash
cd rna_pdb_tools
./rna-pdb-tools.py --no_hr --get_seq input/5k7c.pdb


bash: line 2: ./rna-pdb-tools.py: No such file or directory

In [17]:
%%bash
cd rna_pdb_tools
./rna-pdb-tools.py --no_hr --get_seq input/5k7c.pdb
./rna-pdb-tools.py --no_hr --get_seq input/tetraloop.pdb
./rna-pdb-tools.py --get_seq input/1xjr.pdb


bash: line 2: ./rna-pdb-tools.py: No such file or directory
bash: line 3: ./rna-pdb-tools.py: No such file or directory
bash: line 4: ./rna-pdb-tools.py: No such file or directory

In [ ]: